#Research Question: Do any supplements cause a significant mean decrease in human gut pH from week 1 (pre-supplement) to week 3 (post-supplement)?
library(tidyverse)
library(readxl)
library(broom)
library(cowplot)
library(phyloseq); packageVersion("phyloseq")
set.seed(7)
#Import Data
#Import Data
shared_wkly <- read_delim(file = "raw_data/shared_wkly.txt",
delim = "\t", col_names = TRUE, trim_ws = TRUE,
na = c("", "NA"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## Participant_ID = col_character(),
## Semester = col_character(),
## Study_week = col_character(),
## Quantity_compliant = col_character(),
## Frequency = col_character(),
## Supplement_consumed = col_character()
## )
## See spec(...) for full column specifications.
biographical <- read_delim(file = "raw_data/biographical.txt",
delim = "\t", col_names = TRUE, trim_ws = TRUE,
na = c("", "NA"))
## Parsed with column specification:
## cols(
## Participant_ID = col_character(),
## Use_Data = col_character(),
## Sex = col_character(),
## Age = col_double(),
## Race_ethnicity = col_character(),
## Weight_kg = col_double(),
## Height_meters = col_double(),
## BMI = col_double()
## )
ph_wkly <- read_delim(file = "raw_data/DB_v_0.08/pH_wkly.txt",
delim = "\t", col_names = TRUE, trim_ws = TRUE,
na = c("", "NA"))
## Parsed with column specification:
## cols(
## Participant_ID = col_character(),
## Study_week = col_character(),
## Status = col_character(),
## Semester = col_character(),
## Supplement_consumed = col_character(),
## Use_Data = col_character(),
## Quantity_compliant = col_character(),
## Frequency = col_character(),
## pH_median = col_double(),
## pH_mean = col_double()
## )
#Look at pH Data
ph_wkly
## # A tibble: 687 x 10
## Participant_ID Study_week Status Semester Supplement_cons… Use_Data
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 U400 week3 during Fall2017 Banana yes
## 2 U401 week3 during Fall2017 Banana yes
## 3 U402 week3 during Fall2017 Banana yes
## 4 U404 week3 during Fall2017 Banana yes
## 5 U406 week3 during Fall2017 none yes
## 6 U407 week3 during Fall2017 Banana yes
## 7 U408 week3 during Fall2017 Banana yes
## 8 U411 week3 during Fall2017 Banana yes
## 9 U413 week3 during Fall2017 Banana yes
## 10 U414 week3 during Fall2017 Banana yes
## # … with 677 more rows, and 4 more variables: Quantity_compliant <chr>,
## # Frequency <chr>, pH_median <dbl>, pH_mean <dbl>
count(ph_wkly, Supplement_consumed)
## # A tibble: 7 x 2
## Supplement_consumed n
## <chr> <int>
## 1 Banana 32
## 2 BRMPS 365
## 3 HiMaize+BRMPS 93
## 4 LOODAT 35
## 5 none 73
## 6 transition_HiMaize 87
## 7 <NA> 2
count(ph_wkly, Frequency)
## # A tibble: 4 x 2
## Frequency n
## <chr> <int>
## 1 - 73
## 2 1xdaily 335
## 3 2xdaily 277
## 4 <NA> 2
There are 6 supplements: 1. Banana (n=32) 2. BRMPS (n=365) 3. HiMaize+BRMPS (n=93) 4. LOODAT (n=35) 5. none (n=73) 6. transition_HiMaize (n=87) and 2 with NA (remove)
#Curate pH Weekly Data
#Rename to our standards and filter data
curated_ph_wkly <- ph_wkly %>%
rename_all(tolower) %>%
filter(quantity_compliant == "yes" | quantity_compliant == "none",
use_data == "yes",
study_week == "week1" | study_week == "week3",
frequency != "NA")
View(curated_ph_wkly)
write_delim(curated_ph_wkly, path = "curated_data/curated_ph_wkly.txt", delim = "\t")
Now we have a common standard for all of our data. From here, we can further subset for specific conditions, but this data set contains the general conditions for all further calculations.
#Supplement Specific Assumption & Statistical Testing with Plots
It is important to note that there was no pH data for participants who consumed Bananas for week 1, and thus we could not compute a statistical analysis on it.
#2x Daily BRMPS
#Further Curation
brmps_wk1_2x <- curated_ph_wkly %>%
filter(study_week == "week1",
supplement_consumed == "BRMPS",
frequency == "2xdaily")
brmps_wk3_2x <- curated_ph_wkly %>%
filter(study_week == "week3",
supplement_consumed == "BRMPS",
frequency == "2xdaily")
#Normality
shapiro.test(brmps_wk1_2x$ph_mean) #p-value = .3826
##
## Shapiro-Wilk normality test
##
## data: brmps_wk1_2x$ph_mean
## W = 0.96172, p-value = 0.3826
shapiro.test(brmps_wk3_2x$ph_mean) #p-value = .006713
##
## Shapiro-Wilk normality test
##
## data: brmps_wk3_2x$ph_mean
## W = 0.93851, p-value = 0.006713
#Normality Plots
ggplot(brmps_wk1_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(brmps_wk3_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(brmps_wk1_2x$ph_mean); qqline(brmps_wk1_2x$ph_mean)
qqnorm(brmps_wk3_2x$ph_mean); qqline(brmps_wk3_2x$ph_mean)
#Final Joined Data
brmps_2x <- inner_join(x = brmps_wk1_2x, y = brmps_wk3_2x,
by = c("participant_id", "frequency",
"semester", "supplement_consumed", "quantity_compliant")) %>%
rename(brmps_ph_mean_2x_wk1 = ph_mean.x,
brmps_ph_mean_2x_wk3 = ph_mean.y) %>%
select(-starts_with("study_week"))
#Sample Size + Varience Assumptions
str(brmps_2x) # n=27
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 27 obs. of 13 variables:
## $ participant_id : chr "U500" "U501" "U502" "U507" ...
## $ status.x : chr "before" "before" "before" "before" ...
## $ semester : chr "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
## $ supplement_consumed : chr "BRMPS" "BRMPS" "BRMPS" "BRMPS" ...
## $ use_data.x : chr "yes" "yes" "yes" "yes" ...
## $ quantity_compliant : chr "yes" "yes" "yes" "yes" ...
## $ frequency : chr "2xdaily" "2xdaily" "2xdaily" "2xdaily" ...
## $ ph_median.x : num 6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
## $ brmps_ph_mean_2x_wk1: num 6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
## $ status.y : chr "during" "during" "during" "during" ...
## $ use_data.y : chr "yes" "yes" "yes" "yes" ...
## $ ph_median.y : num 6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
## $ brmps_ph_mean_2x_wk3: num 6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
var.test(x = brmps_2x$brmps_ph_mean_2x_wk1,
y = brmps_2x$brmps_ph_mean_2x_wk3,
alternative = "greater") #p-value = .9512
##
## F test to compare two variances
##
## data: brmps_2x$brmps_ph_mean_2x_wk1 and brmps_2x$brmps_ph_mean_2x_wk3
## F = 0.51589, num df = 26, denom df = 26, p-value = 0.9512
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.2674087 Inf
## sample estimates:
## ratio of variances
## 0.5158883
#Statistical Tests
wilcox.test(x = brmps_2x$brmps_ph_mean_2x_wk1,
y = brmps_2x$brmps_ph_mean_2x_wk3,
alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x = brmps_2x$brmps_ph_mean_2x_wk1, y =
## brmps_2x$brmps_ph_mean_2x_wk3, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = brmps_2x$brmps_ph_mean_2x_wk1, y =
## brmps_2x$brmps_ph_mean_2x_wk3, : cannot compute exact p-value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: brmps_2x$brmps_ph_mean_2x_wk1 and brmps_2x$brmps_ph_mean_2x_wk3
## V = 252, p-value = 0.0002761
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.0002761
#BRMPS 2x Daily Plot
brmps2_plot <- curated_ph_wkly %>%
filter(supplement_consumed == "BRMPS",
study_week == "week1" | study_week == "week3",
frequency == "2xdaily") %>%
ggplot(aes(x = study_week,
y = ph_mean)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week))+
labs(x = NULL,
y = "Average weekly pH",
title = "Change in pH after BRMPS Supplement (2x daily)") +
theme(legend.position = "none")
#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
plot = brmps2_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)
brmps2_plot
We have an extremely low p-value (less than alpha = .001) indicating that BRMPS taken 2x daily did lower pH in participants from week 1 to week 3 on average. In calculating the conclusion, we decided to use a non-parametric t-test (wilcox) because week 3 had non-normal data and week 1 did not follow the Normal Q-Q plot very accurately and the histogram had a slight bimodal look. Additionally because the sample size was less than 30 we decided to be safe and use the non-parametric test; variences were equal though for this test.
#2x Daily Transition Maize
#Further Curation
transition_wk1_2x <- curated_ph_wkly %>%
filter(study_week == "week1",
supplement_consumed == "transition_HiMaize",
frequency == "2xdaily")
transition_wk3_2x <- curated_ph_wkly %>%
filter(study_week == "week3",
supplement_consumed == "transition_HiMaize",
frequency == "2xdaily")
#Normaily + Varience Assumptions
shapiro.test(transition_wk1_2x$ph_mean) #p-value = .8910
##
## Shapiro-Wilk normality test
##
## data: transition_wk1_2x$ph_mean
## W = 0.98083, p-value = 0.891
shapiro.test(transition_wk3_2x$ph_mean) #p-value = .5083
##
## Shapiro-Wilk normality test
##
## data: transition_wk3_2x$ph_mean
## W = 0.96074, p-value = 0.5043
#Normality Plots
ggplot(transition_wk1_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(transition_wk3_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(transition_wk1_2x$ph_mean); qqline(transition_wk1_2x$ph_mean)
qqnorm(transition_wk3_2x$ph_mean); qqline(transition_wk3_2x$ph_mean)
#Final Joined Data
transition_2x <- inner_join(x = transition_wk1_2x, y = transition_wk3_2x,
by = c("participant_id", "frequency",
"semester", "supplement_consumed", "quantity_compliant")) %>%
rename(transition_ph_mean_2x_wk1 = ph_mean.x,
transition_ph_mean_2x_wk3 = ph_mean.y) %>%
select(-starts_with("study_week"))
#Sample Size + Varience Assumptions
str(transition_2x) # n=21
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 21 obs. of 13 variables:
## $ participant_id : chr "U547" "U548" "U550" "U551" ...
## $ status.x : chr "before" "before" "before" "before" ...
## $ semester : chr "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
## $ supplement_consumed : chr "transition_HiMaize" "transition_HiMaize" "transition_HiMaize" "transition_HiMaize" ...
## $ use_data.x : chr "yes" "yes" "yes" "yes" ...
## $ quantity_compliant : chr "yes" "yes" "yes" "yes" ...
## $ frequency : chr "2xdaily" "2xdaily" "2xdaily" "2xdaily" ...
## $ ph_median.x : num 7.05 6.4 5.8 6.6 6 6.6 6.4 6.4 6.6 6.8 ...
## $ transition_ph_mean_2x_wk1: num 7.05 6.4 5.8 6.6 6 6.6 6.4 6.4 6.6 6.8 ...
## $ status.y : chr "during" "during" "during" "during" ...
## $ use_data.y : chr "yes" "yes" "yes" "yes" ...
## $ ph_median.y : num 7.2 6.2 6.6 6.2 6.2 6.2 5.4 6.3 6 6.2 ...
## $ transition_ph_mean_2x_wk3: num 7.2 6.2 6.6 6.2 6.2 6.2 5.4 6.3 5.87 6.2 ...
var.test(x = transition_2x$transition_ph_mean_2x_wk1,
y = transition_2x$transition_ph_mean_2x_wk3,
alternative = "greater") #p-value = .9395
##
## F test to compare two variances
##
## data: transition_2x$transition_ph_mean_2x_wk1 and transition_2x$transition_ph_mean_2x_wk3
## F = 0.49206, num df = 20, denom df = 20, p-value = 0.9395
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.2316492 Inf
## sample estimates:
## ratio of variances
## 0.4920589
#Statistical Tests
t.test(x = transition_2x$transition_ph_mean_2x_wk1, y = transition_2x$transition_ph_mean_2x_wk3,
alternative = "greater", paired = TRUE, var.equal = TRUE)
##
## Paired t-test
##
## data: transition_2x$transition_ph_mean_2x_wk1 and transition_2x$transition_ph_mean_2x_wk3
## t = 2.6789, df = 20, p-value = 0.007215
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.1031209 Inf
## sample estimates:
## mean of the differences
## 0.2895238
#p-value = 0.007215
#Transition HiMaize 2x Daily Plot
transition_himaize_plot <- curated_ph_wkly %>%
filter(supplement_consumed == "transition_HiMaize",
study_week == "week1" | study_week == "week3") %>%
ggplot(aes(x = study_week,
y = ph_mean)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week)) +
labs(x = NULL,
y = "Average weekly pH",
title = "Change in pH after 2x Transition HiMaize") +
theme(legend.position = "none")
#Save Plot
save_plot(filename = "transition_himaize_plot.pdf",
plot = transition_himaize_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)
transition_himaize_plot
After running significance testing for Transition Maize taken 2x daily we found that there was a signifiant p-value (less than alpha = .01) providing evidence to reject the null hypothesis and conclude that this supplement did decrease the pH in participants from week 1 to week 3 on average. The sample size was not ideal, but both shapiro tests indicated that each subsetted data set was approximately normal which we believed was sufficient evidence to carry out a parametric test (t-test); the variences were equal for the data as well.
#1x Daily BRMPS
#Further Curation
brmps_wk1_1x <- curated_ph_wkly %>%
filter(study_week == "week1",
supplement_consumed == "BRMPS",
frequency == "1xdaily")
brmps_wk3_1x <- curated_ph_wkly %>%
filter(study_week == "week3",
supplement_consumed == "BRMPS",
frequency == "1xdaily")
#Normaily + Varience Assumptions
shapiro.test(brmps_wk1_1x$ph_mean) #p-value = 0.01669
##
## Shapiro-Wilk normality test
##
## data: brmps_wk1_1x$ph_mean
## W = 0.9533, p-value = 0.01669
shapiro.test(brmps_wk3_1x$ph_mean) #p-value = .08154
##
## Shapiro-Wilk normality test
##
## data: brmps_wk3_1x$ph_mean
## W = 0.96627, p-value = 0.08154
#Normality Plots
ggplot(brmps_wk1_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(brmps_wk3_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(brmps_wk1_1x$ph_mean); qqline(brmps_wk1_1x$ph_mean)
qqnorm(brmps_wk3_1x$ph_mean); qqline(brmps_wk3_1x$ph_mean)
#Final Joined Data
brmps_1x <- inner_join(x = brmps_wk1_1x, y = brmps_wk3_1x,
by = c("participant_id", "frequency",
"semester", "supplement_consumed", "quantity_compliant")) %>%
rename(brmps_ph_mean_1x_wk1 = ph_mean.x,
brmps_ph_mean_1x_wk3 = ph_mean.y) %>%
select(-starts_with("study_week"))
#Sample Size + Varience Assumptions
str(brmps_1x) # n=62
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 62 obs. of 13 variables:
## $ participant_id : chr "U602" "U603" "U605" "U606" ...
## $ status.x : chr "before" "before" "before" "before" ...
## $ semester : chr "Fall2018" "Fall2018" "Fall2018" "Fall2018" ...
## $ supplement_consumed : chr "BRMPS" "BRMPS" "BRMPS" "BRMPS" ...
## $ use_data.x : chr "yes" "yes" "yes" "yes" ...
## $ quantity_compliant : chr "yes" "yes" "yes" "yes" ...
## $ frequency : chr "1xdaily" "1xdaily" "1xdaily" "1xdaily" ...
## $ ph_median.x : num 6.38 7.25 7.5 7.5 6.88 7.25 7.5 6.25 6.75 5.88 ...
## $ brmps_ph_mean_1x_wk1: num 6.38 7.25 7.5 7.5 6.88 7.25 7.5 6.25 6.75 5.88 ...
## $ status.y : chr "during" "during" "during" "during" ...
## $ use_data.y : chr "yes" "yes" "yes" "yes" ...
## $ ph_median.y : num 6.38 6.88 7.12 6.75 7.75 7 6.5 6.12 7.5 6.25 ...
## $ brmps_ph_mean_1x_wk3: num 6.38 6.88 7.12 6.75 7.75 7 6.5 6.12 7.5 6.25 ...
var.test(x = brmps_1x$brmps_ph_mean_1x_wk1,
y = brmps_1x$brmps_ph_mean_1x_wk3,
alternative = "greater") #p-value = .6063
##
## F test to compare two variances
##
## data: brmps_1x$brmps_ph_mean_1x_wk1 and brmps_1x$brmps_ph_mean_1x_wk3
## F = 0.93301, num df = 61, denom df = 61, p-value = 0.6063
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.6102759 Inf
## sample estimates:
## ratio of variances
## 0.93301
#Statistical Tests
t.test(x = brmps_1x$brmps_ph_mean_1x_wk1, y = brmps_1x$brmps_ph_mean_1x_wk3,
alternative = "greater", paired = TRUE, var.equal = TRUE)
##
## Paired t-test
##
## data: brmps_1x$brmps_ph_mean_1x_wk1 and brmps_1x$brmps_ph_mean_1x_wk3
## t = 1.2214, df = 61, p-value = 0.1133
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.03371922 Inf
## sample estimates:
## mean of the differences
## 0.09177419
#p-value = 0.1133
#BRMPS 1x Daily Plot
brmps1_plot <- curated_ph_wkly %>%
filter(supplement_consumed == "BRMPS",
study_week == "week1" | study_week == "week3",
frequency == "1xdaily") %>%
ggplot(aes(x = study_week,
y = ph_mean)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week))+
labs(x = NULL,
y = "Average weekly pH",
title = "Change in pH after BRMPS Supplement (1x daily)") +
theme(legend.position = "none")
#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
plot = brmps1_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)
brmps1_plot
Although close to our alpha value of .10, we still could not reject our null hypothesis because the p-value was higher than the alpha value. Thus, BRMPS consumed 1x daily did not appear to significantly decrease the pH in participants’ guts on average from week 1 to week 3. We decided that a parametric test would be sufficient since the qq plots showed normal data and the histograms appeared to be approximately normal, even thought the sample size was not greater than 30. Additionally, we found the variences to be equal between the subsetted data sets.
#1x Daily HiMaize+BRMPS
#Further Curation
himaize_brmps_wk1_1x <- curated_ph_wkly %>%
filter(study_week == "week1",
supplement_consumed == "HiMaize+BRMPS",
frequency == "1xdaily")
himaize_brmps_wk3_1x <- curated_ph_wkly %>%
filter(study_week == "week3",
supplement_consumed == "HiMaize+BRMPS",
frequency == "1xdaily")
#Normaily + Varience Assumptions
shapiro.test(himaize_brmps_wk1_1x$ph_mean) #p-value = .2955
##
## Shapiro-Wilk normality test
##
## data: himaize_brmps_wk1_1x$ph_mean
## W = 0.95021, p-value = 0.2955
shapiro.test(himaize_brmps_wk3_1x$ph_mean) #p-value = .05622
##
## Shapiro-Wilk normality test
##
## data: himaize_brmps_wk3_1x$ph_mean
## W = 0.91655, p-value = 0.05622
#Normality Plots
ggplot(himaize_brmps_wk1_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(himaize_brmps_wk3_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(himaize_brmps_wk1_1x$ph_mean); qqline(himaize_brmps_wk1_1x$ph_mean)
qqnorm(himaize_brmps_wk3_1x$ph_mean); qqline(himaize_brmps_wk3_1x$ph_mean)
#Final Joined Data
himaize_brmps_1x <- inner_join(x = himaize_brmps_wk1_1x, y = himaize_brmps_wk3_1x,
by = c("participant_id", "frequency",
"semester", "supplement_consumed", "quantity_compliant")) %>%
rename(himaize_brmps_ph_mean_1x_wk1 = ph_mean.x,
himaize_brmps_ph_mean_1x_wk3 = ph_mean.y) %>%
select(-starts_with("study_week"))
#Sample Size + Varience Assumptions
str(himaize_brmps_1x) # n=23
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 23 obs. of 13 variables:
## $ participant_id : chr "U643" "U644" "U645" "U646" ...
## $ status.x : chr "before" "before" "before" "before" ...
## $ semester : chr "Fall2018" "Fall2018" "Fall2018" "Fall2018" ...
## $ supplement_consumed : chr "HiMaize+BRMPS" "HiMaize+BRMPS" "HiMaize+BRMPS" "HiMaize+BRMPS" ...
## $ use_data.x : chr "yes" "yes" "yes" "yes" ...
## $ quantity_compliant : chr "yes" "yes" "yes" "yes" ...
## $ frequency : chr "1xdaily" "1xdaily" "1xdaily" "1xdaily" ...
## $ ph_median.x : num 7.75 7.5 7.1 6.5 6.62 7.65 6.75 7.38 6.88 7.75 ...
## $ himaize_brmps_ph_mean_1x_wk1: num 7.75 7.5 7.1 6.5 6.62 7.65 6.75 7.38 6.88 7.75 ...
## $ status.y : chr "during" "during" "during" "during" ...
## $ use_data.y : chr "yes" "yes" "yes" "yes" ...
## $ ph_median.y : num 6.38 7.25 7.5 7.75 6.25 7.5 6.5 7.38 6 6.75 ...
## $ himaize_brmps_ph_mean_1x_wk3: num 6.38 7.25 7.5 7.75 6.25 7.5 6.5 7.38 6 6.75 ...
var.test(x = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1,
y = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3,
alternative = "greater") #p-value = .489
##
## F test to compare two variances
##
## data: himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1 and himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3
## F = 1.012, num df = 22, denom df = 22, p-value = 0.489
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.4941971 Inf
## sample estimates:
## ratio of variances
## 1.012002
#Statistical Tests
wilcox.test(x = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1,
y = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3,
alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x =
## himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1, : cannot compute exact p-
## value with ties
## Warning in wilcox.test.default(x =
## himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1, : cannot compute exact p-
## value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1 and himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3
## V = 162.5, p-value = 0.05289
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.05289
#HiMaize+BRMPS 1x Daily Plot
himaize_brmps_plot <- curated_ph_wkly %>%
filter(supplement_consumed == "HiMaize+BRMPS",
study_week == "week1" | study_week == "week3") %>%
ggplot(aes(x = study_week,
y = ph_mean)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week))+
labs(x = NULL,
y = "Average weekly pH",
title = "Change in pH after HiMaize+BRMPS Supplement") +
theme(legend.position = "none")
#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
plot = himaize_brmps_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)
himaize_brmps_plot
Since we are looking at complex and diverse biological systems, we thought it was reasonable to use an alpha value of .10. Thus, in this case we found a significant p-value that was less than alpha = .10 providing us with sufficient evidence to reject the null hypothesis and accept the alternative concluding that consuming HiMaize+BRMPS supplement 1x daily did decrease pH on average in individuals from week 1 to week 3. We used a non-parametric test (wilcox) with equal variences because week 3 had non-normal data according to the shapiro test and the graph for week 1 appeared to be bimodal in addition to a low sample size.
#1x Daily LOODAT
#Further Curation
loodat_wk1_1x <- curated_ph_wkly %>%
filter(study_week == "week1",
supplement_consumed == "LOODAT",
frequency == "1xdaily")
loodat_wk3_1x <- curated_ph_wkly %>%
filter(study_week == "week3",
supplement_consumed == "LOODAT",
frequency == "1xdaily")
#Normaily + Varience Assumptions
shapiro.test(loodat_wk1_1x$ph_mean) #p-value = .1888
##
## Shapiro-Wilk normality test
##
## data: loodat_wk1_1x$ph_mean
## W = 0.91943, p-value = 0.1888
shapiro.test(loodat_wk3_1x$ph_mean) #p-value = .0213
##
## Shapiro-Wilk normality test
##
## data: loodat_wk3_1x$ph_mean
## W = 0.85622, p-value = 0.0213
#Normality Plots
ggplot(loodat_wk1_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(loodat_wk3_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(loodat_wk1_1x$ph_mean); qqline(loodat_wk1_1x$ph_mean)
qqnorm(loodat_wk3_1x$ph_mean); qqline(loodat_wk3_1x$ph_mean)
#Final Joined Data
loodat_1x <- inner_join(x = loodat_wk1_1x, y = loodat_wk3_1x,
by = c("participant_id", "frequency",
"semester", "supplement_consumed", "quantity_compliant")) %>%
rename(loodat_ph_mean_1x_wk1 = ph_mean.x,
loodat_ph_mean_1x_wk3 = ph_mean.y) %>%
select(-starts_with("study_week"))
#Sample Size + Varience Assumptions
str(loodat_1x) # n=15
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 15 obs. of 13 variables:
## $ participant_id : chr "U701" "U706" "U707" "U708" ...
## $ status.x : chr "before" "before" "before" "before" ...
## $ semester : chr "Winter2019" "Winter2019" "Winter2019" "Winter2019" ...
## $ supplement_consumed : chr "LOODAT" "LOODAT" "LOODAT" "LOODAT" ...
## $ use_data.x : chr "yes" "yes" "yes" "yes" ...
## $ quantity_compliant : chr "yes" "yes" "yes" "yes" ...
## $ frequency : chr "1xdaily" "1xdaily" "1xdaily" "1xdaily" ...
## $ ph_median.x : num 7.5 7.5 7.5 7 7.25 5.25 6.75 7.25 6 6.25 ...
## $ loodat_ph_mean_1x_wk1: num 7.33 7.42 7.08 7 7.33 5.33 6.67 7.25 6 6.5 ...
## $ status.y : chr "during" "during" "during" "during" ...
## $ use_data.y : chr "yes" "yes" "yes" "yes" ...
## $ ph_median.y : num 7.25 7.5 7.62 7 7.5 6.25 7.25 7.25 6.25 6.25 ...
## $ loodat_ph_mean_1x_wk3: num 7.5 7.33 7.62 7.17 7.17 6.25 7.17 7.25 6.17 6.25 ...
var.test(x = loodat_1x$loodat_ph_mean_1x_wk1,
y = loodat_1x$loodat_ph_mean_1x_wk3,
alternative = "greater") #p-value = .3212
##
## F test to compare two variances
##
## data: loodat_1x$loodat_ph_mean_1x_wk1 and loodat_1x$loodat_ph_mean_1x_wk3
## F = 1.2878, num df = 14, denom df = 14, p-value = 0.3212
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.5184902 Inf
## sample estimates:
## ratio of variances
## 1.287787
#Statistical Tests
wilcox.test(x = loodat_1x$loodat_ph_mean_1x_wk1,
y = loodat_1x$loodat_ph_mean_1x_wk3,
alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x = loodat_1x$loodat_ph_mean_1x_wk1, y =
## loodat_1x$loodat_ph_mean_1x_wk3, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = loodat_1x$loodat_ph_mean_1x_wk1, y =
## loodat_1x$loodat_ph_mean_1x_wk3, : cannot compute exact p-value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: loodat_1x$loodat_ph_mean_1x_wk1 and loodat_1x$loodat_ph_mean_1x_wk3
## V = 27.5, p-value = 0.9024
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.9024
#LOODAT 1x Daily Plot
loodat_plot <- curated_ph_wkly %>%
filter(supplement_consumed == "LOODAT",
study_week == "week1" | study_week == "week3",
frequency == "1xdaily") %>%
ggplot(aes(x = study_week,
y = ph_mean)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week))+
labs(x = NULL,
y = "Average weekly pH",
title = "Change in pH after LOODAT Supplement") +
theme(legend.position = "none")
#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
plot = loodat_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)
loodat_plot
The p-value here was far from significant, which would have required it to be lower than alpha = .10. Since it was above this value, we cannot reject the null hypothesis and therefore must conclude that consuming LOODAT 1x daily did not decrease the pH on average in participants’ guts from week 1 to week 3. We used a non-parametric test (wilcox) with equal variences because both subsetted data sets had bimodal data and there was a low sample size.
#No Supplement Consumed
#Further Curation
no_supplement_wk1 <- curated_ph_wkly %>%
filter(study_week == "week1",
supplement_consumed == "none",
frequency == "-")
no_supplement_wk3 <- curated_ph_wkly %>%
filter(study_week == "week3",
supplement_consumed == "none",
frequency == "-")
#Normaily + Varience Assumptions
shapiro.test(no_supplement_wk1$ph_mean) #p-value = .1029
##
## Shapiro-Wilk normality test
##
## data: no_supplement_wk1$ph_mean
## W = 0.93318, p-value = 0.1029
shapiro.test(no_supplement_wk3$ph_mean) #p-value = .03968
##
## Shapiro-Wilk normality test
##
## data: no_supplement_wk3$ph_mean
## W = 0.93654, p-value = 0.03968
#Normality Plots
ggplot(no_supplement_wk1, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(no_supplement_wk3, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(no_supplement_wk1$ph_mean); qqline(no_supplement_wk1$ph_mean)
qqnorm(no_supplement_wk3$ph_mean); qqline(no_supplement_wk3$ph_mean)
#Final Joined Data
no_supplement <- inner_join(x = no_supplement_wk1, y = no_supplement_wk3,
by = c("participant_id", "frequency",
"semester", "supplement_consumed", "quantity_compliant")) %>%
rename(no_supplement_ph_mean_wk1 = ph_mean.x,
no_supplement_ph_mean_wk3 = ph_mean.y) %>%
select(-starts_with("study_week"))
no_supplement
## # A tibble: 24 x 13
## participant_id status.x semester supplement_cons… use_data.x
## <chr> <chr> <chr> <chr> <chr>
## 1 U513 before Winter2… none yes
## 2 U544 before Winter2… none yes
## 3 U562 before Winter2… none yes
## 4 U566 before Winter2… none yes
## 5 U618 before Fall2018 none yes
## 6 U637 before Fall2018 none yes
## 7 U650 before Fall2018 none yes
## 8 U663 before Fall2018 none yes
## 9 U665 before Fall2018 none yes
## 10 U668 before Fall2018 none yes
## # … with 14 more rows, and 8 more variables: quantity_compliant <chr>,
## # frequency <chr>, ph_median.x <dbl>, no_supplement_ph_mean_wk1 <dbl>,
## # status.y <chr>, use_data.y <chr>, ph_median.y <dbl>,
## # no_supplement_ph_mean_wk3 <dbl>
#Sample Size + Varience Assumptions
str(no_supplement) # n=24
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 24 obs. of 13 variables:
## $ participant_id : chr "U513" "U544" "U562" "U566" ...
## $ status.x : chr "before" "before" "before" "before" ...
## $ semester : chr "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
## $ supplement_consumed : chr "none" "none" "none" "none" ...
## $ use_data.x : chr "yes" "yes" "yes" "yes" ...
## $ quantity_compliant : chr "none" "none" "none" "none" ...
## $ frequency : chr "-" "-" "-" "-" ...
## $ ph_median.x : num 6.5 6.8 6.2 6.2 7.5 6.12 6.25 6.38 7.25 8 ...
## $ no_supplement_ph_mean_wk1: num 6.5 6.8 6.2 6.2 7.5 6.12 6.25 6.38 7.25 8 ...
## $ status.y : chr "during" "during" "during" "during" ...
## $ use_data.y : chr "yes" "yes" "yes" "yes" ...
## $ ph_median.y : num 6.55 6.4 6.2 7 7.75 6 7.75 7.5 6.5 8 ...
## $ no_supplement_ph_mean_wk3: num 6.55 6.4 6.2 7 7.75 6 7.75 7.5 6.5 8 ...
var.test(x = no_supplement$no_supplement_ph_mean_wk1,
y = no_supplement$no_supplement_ph_mean_wk3,
alternative = "greater") #p-value = 0.2721
##
## F test to compare two variances
##
## data: no_supplement$no_supplement_ph_mean_wk1 and no_supplement$no_supplement_ph_mean_wk3
## F = 1.2918, num df = 23, denom df = 23, p-value = 0.2721
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.6412776 Inf
## sample estimates:
## ratio of variances
## 1.291806
#Statistical Tests
wilcox.test(x = no_supplement$no_supplement_ph_mean_wk1,
y = no_supplement$no_supplement_ph_mean_wk3,
alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x =
## no_supplement$no_supplement_ph_mean_wk1, : cannot compute exact p-value
## with ties
## Warning in wilcox.test.default(x =
## no_supplement$no_supplement_ph_mean_wk1, : cannot compute exact p-value
## with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: no_supplement$no_supplement_ph_mean_wk1 and no_supplement$no_supplement_ph_mean_wk3
## V = 94, p-value = 0.7781
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.7781
#No Supplement Plot
no_supplement_plot <- curated_ph_wkly %>%
filter(supplement_consumed == "none",
study_week == "week1" | study_week == "week3") %>%
ggplot(aes(x = study_week,
y = ph_mean)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week))+
labs(x = NULL,
y = "Average weekly pH",
title = "Change in pH without Supplement") +
theme(legend.position = "none")
#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
plot = no_supplement_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)
no_supplement_plot
In testing our control (no supplement) group, we hypothesized that there would be no significant result, and that is exactly what we got as the p-value was greater than alpha = .10. Thus we accept the null hypothesis stating that there was not a decrease on average for participants who did not consume a supplement from week 1 to week 3. We surprisingly found the data sets to be bimodal when we graphed them and did we not have a large sample size leading us to use a non-parametric (wilcox) statistical test. In addition, we found the variences to be equal between the two subsetted data sets.
#Finding the Mean Difference in pH between Week 3 and Week 1 for Each Supplement (Week 3 - Week 1)
The fact that some supplements caused a significant decrease in pH and others did not suggests that not all supplements produce the same results and that there might be an overarching supplement that caused the lowest drop in pH. Our goal here is to determine which supplement causes the largest mean difference in pH. We will only test the statistically significant data sets found above:
#BRMPS 2x Daily Mean pH Difference
mean_diff_brmps_2x <- brmps_2x %>%
select(participant_id, brmps_ph_mean_2x_wk1, brmps_ph_mean_2x_wk3) %>%
mutate(ph_difference = brmps_ph_mean_2x_wk3 - brmps_ph_mean_2x_wk1) %>%
summarize(avg_ph_difference = mean(ph_difference))
View(mean_diff_brmps_2x)
#pH Difference = -0.3414815
This calculation tells us that participants who consumed BRMPS 2x daily had an average decrease of .34 in pH from week 1 to week 3.
#Transition Maize 2x Daily Mean pH Difference
mean_diff_transition_2x <- transition_2x %>%
select(participant_id, transition_ph_mean_2x_wk1, transition_ph_mean_2x_wk3) %>%
mutate(ph_difference = transition_ph_mean_2x_wk3 - transition_ph_mean_2x_wk1) %>%
summarize(avg_ph_difference = mean(ph_difference))
View(mean_diff_transition_2x)
#pH Difference = -0.2895238
This calculation tells us that participants who consumed Transition Maize 2x daily had an average decrease of .29 in pH from week 1 to week 3.
#HiMaize+BRMPS 1x Daily Mean pH Difference
mean_diff_himaize_brmps_1x <- himaize_brmps_1x %>%
select(participant_id, himaize_brmps_ph_mean_1x_wk1, himaize_brmps_ph_mean_1x_wk3) %>%
mutate(ph_difference = himaize_brmps_ph_mean_1x_wk3 - himaize_brmps_ph_mean_1x_wk1) %>%
summarize(avg_ph_difference = mean(ph_difference))
View(mean_diff_himaize_brmps_1x)
#pH Difference = -0.2556522
This calculation tells us that participants who consumed HiMaize+BRMPS 1x daily had an average decrease of .26 in pH from week 1 to week 3.
As we can see, 2x Daily BRMPS causes the largest mean decrease in pH from weak 1 to weak 3, and thus we will focus on this supplement for the remainder of our statistical study.
#Species Diversity in BRMPS 2x Daily Individuals
#Import Data
seq_var_table <- read_delim("raw_data/species_avg_shared.txt",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, na=c("NA"),
col_types = list()) %>%
rename_all(tolower) %>%
select(participant_id, study_week, semester, starts_with("Lactoc"), starts_with("Lactob"))
seq_var_table
## # A tibble: 1,202 x 44
## participant_id study_week semester `lactococcus la… lactococcus
## <chr> <chr> <chr> <dbl> <dbl>
## 1 U042 week1 Fall2015 0 0
## 2 U042 week3 Fall2015 0 0
## 3 U044 week3 Fall2015 1.5 0
## 4 U046 week3 Fall2015 0 0
## 5 U048 week3 Fall2015 0 0
## 6 U050 week3 Fall2015 0 0
## 7 U051 week1 Fall2015 0 0
## 8 U052 week1 Fall2015 0 0
## 9 U052 week3 Fall2015 13 0
## 10 U053 week1 Fall2015 4 0
## # … with 1,192 more rows, and 39 more variables: `lactococcus
## # raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## # garvieae` <dbl>, `lactobacillus pontis` <dbl>, `lactobacillus
## # murinus` <dbl>, `lactobacillus sanfranciscensis` <dbl>, `lactobacillus
## # johnsonii` <dbl>, lactobacillaceae <dbl>, `lactobacillus
## # ruminis` <dbl>, `lactobacillus salivarius` <dbl>, `lactobacillus
## # timonensis` <dbl>, lactobacillus <dbl>, `lactobacillus
## # crispatus/gallinarum` <dbl>, `lactobacillus gallinarum` <dbl>,
## # `lactobacillus iners` <dbl>, `lactobacillus vaginalis` <dbl>,
## # `lactobacillus crispatus` <dbl>, `lactobacillus
## # apodemi/murinus` <dbl>, `lactobacillus fermentum` <dbl>,
## # `lactobacillus gasseri/johnsonii` <dbl>, lactobacillales <dbl>,
## # `lactobacillus casei` <dbl>, `lactobacillus brevis` <dbl>,
## # `lactobacillus buchneri` <dbl>, `lactobacillus delbrueckii` <dbl>,
## # `lactobacillus apis` <dbl>, `lactobacillus hokkaidonensis` <dbl>,
## # `lactobacillus agilis` <dbl>, `lactobacillus casei group` <dbl>,
## # `lactobacillus rhamnosus` <dbl>, `lactobacillus
## # kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## # `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## # `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## # `lactobacillus algidus` <dbl>, `lactobacillus
## # acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>
#Lactobacillus
#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
filter(supplement_consumed == "BRMPS",
frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
## participant_id study_week status semester supplement_cons… use_data
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 U445 week3 during Fall2017 BRMPS yes
## 2 U446 week3 during Fall2017 BRMPS yes
## 3 U448 week3 during Fall2017 BRMPS yes
## 4 U449 week3 during Fall2017 BRMPS yes
## 5 U450 week3 during Fall2017 BRMPS yes
## 6 U452 week3 during Fall2017 BRMPS yes
## 7 U453 week3 during Fall2017 BRMPS yes
## 8 U455 week3 during Fall2017 BRMPS yes
## 9 U458 week3 during Fall2017 BRMPS yes
## 10 U460 week3 during Fall2017 BRMPS yes
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## # frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactobacillus <- inner_join(seq_var_table, brmps_2x_phylo) %>% #combine two phyloseq objects created above
select(-starts_with("lactoco")) #get rid of lactococcus
## Joining, by = c("participant_id", "study_week", "semester")
lactobacillus
## # A tibble: 54 x 46
## participant_id study_week semester `lactobacillus … `lactobacillus …
## <chr> <chr> <chr> <dbl> <dbl>
## 1 U500 week1 Winter2… 0 0
## 2 U500 week3 Winter2… 0 0
## 3 U501 week1 Winter2… 0 0
## 4 U501 week3 Winter2… 0 0
## 5 U502 week1 Winter2… 0 0
## 6 U502 week3 Winter2… 0 0
## 7 U507 week1 Winter2… 0 0
## 8 U507 week3 Winter2… 0 0
## 9 U509 week1 Winter2… 0 0
## 10 U509 week3 Winter2… 0 0
## # … with 44 more rows, and 41 more variables: `lactobacillus
## # sanfranciscensis` <dbl>, `lactobacillus johnsonii` <dbl>,
## # lactobacillaceae <dbl>, `lactobacillus ruminis` <dbl>, `lactobacillus
## # salivarius` <dbl>, `lactobacillus timonensis` <dbl>,
## # lactobacillus <dbl>, `lactobacillus crispatus/gallinarum` <dbl>,
## # `lactobacillus gallinarum` <dbl>, `lactobacillus iners` <dbl>,
## # `lactobacillus vaginalis` <dbl>, `lactobacillus crispatus` <dbl>,
## # `lactobacillus apodemi/murinus` <dbl>, `lactobacillus
## # fermentum` <dbl>, `lactobacillus gasseri/johnsonii` <dbl>,
## # lactobacillales <dbl>, `lactobacillus casei` <dbl>, `lactobacillus
## # brevis` <dbl>, `lactobacillus buchneri` <dbl>, `lactobacillus
## # delbrueckii` <dbl>, `lactobacillus apis` <dbl>, `lactobacillus
## # hokkaidonensis` <dbl>, `lactobacillus agilis` <dbl>, `lactobacillus
## # casei group` <dbl>, `lactobacillus rhamnosus` <dbl>, `lactobacillus
## # kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## # `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## # `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## # `lactobacillus algidus` <dbl>, `lactobacillus
## # acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>,
## # status <chr>, supplement_consumed <chr>, use_data <chr>,
## # quantity_compliant <chr>, frequency <chr>, ph_median <dbl>,
## # ph_mean <dbl>
#Calculate Row Sums
lactobacillus_total <- rowSums(lactobacillus[,4:39])
lactobacillus_total
## [1] 22.00 280.50 0.00 0.00 0.00 0.00 0.00 0.00 5.33 13.67
## [11] 0.00 0.00 0.00 48.50 5.00 81.33 1.00 0.00 0.00 115.50
## [21] 0.00 0.00 59.00 56.00 0.00 2.33 0.00 0.00 188.00 31.33
## [31] 36.50 474.66 27.00 6.67 0.00 0.00 0.00 24.00 23.34 47.66
## [41] 0.67 1.67 3.00 0.00 0.00 0.00 0.00 340.00 29.66 77.99
## [51] 116.00 565.67 0.00 0.67
#Mutate Row Sums into Previous Table
new_lactobacillus <- lactobacillus %>%
mutate(lactobacillus = lactobacillus_total) %>%
select(participant_id, study_week, semester, lactobacillus, ph_median, ph_mean, frequency, supplement_consumed)
new_lactobacillus
## # A tibble: 54 x 8
## participant_id study_week semester lactobacillus ph_median ph_mean
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 U500 week1 Winter2… 22 6.6 6.6
## 2 U500 week3 Winter2… 280. 6.45 6.45
## 3 U501 week1 Winter2… 0 6.6 6.6
## 4 U501 week3 Winter2… 0 6.2 6.2
## 5 U502 week1 Winter2… 0 6.4 6.4
## 6 U502 week3 Winter2… 0 6.4 6.4
## 7 U507 week1 Winter2… 0 6.7 6.7
## 8 U507 week3 Winter2… 0 6.4 6.4
## 9 U509 week1 Winter2… 5.33 6.8 6.8
## 10 U509 week3 Winter2… 13.7 6 6
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## # supplement_consumed <chr>
#Correlation Data With Graph
wk_3_lactobacillus <- new_lactobacillus %>%
filter(study_week == "week3")
wk_3_lactobacillus_plot <- wk_3_lactobacillus %>%
ggplot(aes(x = ph_mean,
y = lactobacillus)) +
geom_point() + #puts data points to match x and y coordinates
geom_smooth(method = "lm", #used to create a linear best fit line
se = FALSE) + #hides confidence interval around line
xlab("Week 3 Mean pH") +
ylab("Relative Abundance of Lactobacillus Bacteria")
wk_3_lactobacillus_plot
#Correlation Test
lactobacillus_correlation <- wk_3_lactobacillus %>%
lm(ph_mean ~ lactobacillus, data = .) #test relationship
summary(lactobacillus_correlation) #view results
##
## Call:
## lm(formula = ph_mean ~ lactobacillus, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9645 -0.3357 0.1972 0.2716 1.0468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1284049 0.1065108 57.538 <2e-16 ***
## lactobacillus 0.0004436 0.0006286 0.706 0.487
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4872 on 25 degrees of freedom
## Multiple R-squared: 0.01953, Adjusted R-squared: -0.01969
## F-statistic: 0.4979 on 1 and 25 DF, p-value: 0.4869
lactobacillus_correlation
##
## Call:
## lm(formula = ph_mean ~ lactobacillus, data = .)
##
## Coefficients:
## (Intercept) lactobacillus
## 6.1284049 0.0004436
#p-value = .4869
#R-squared = .01953
Just looking at the scatter plot we can see that there does not appear to be any correlation or pattern between the dots, as they appear to be randomly scattered. Our statistical analysis confirms this as the large p-value (p-value = .4869) indicates that the null hypothesis is true meaning there is no correlation between pH and Lactobacillus genre bacteria. Additionally, the small r-squared value indicates that only about 1.9% of the data can be truly represented by the line of best fit, which is extremely small and inaccurate. If there were to be a correlation, it would be a positive one according to the statistical analysis which is opposite to what we hypothesized, as here the number of Lactobacillus bacteria increases as pH increases.
#Lactococcus
#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
filter(supplement_consumed == "BRMPS",
frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
## participant_id study_week status semester supplement_cons… use_data
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 U445 week3 during Fall2017 BRMPS yes
## 2 U446 week3 during Fall2017 BRMPS yes
## 3 U448 week3 during Fall2017 BRMPS yes
## 4 U449 week3 during Fall2017 BRMPS yes
## 5 U450 week3 during Fall2017 BRMPS yes
## 6 U452 week3 during Fall2017 BRMPS yes
## 7 U453 week3 during Fall2017 BRMPS yes
## 8 U455 week3 during Fall2017 BRMPS yes
## 9 U458 week3 during Fall2017 BRMPS yes
## 10 U460 week3 during Fall2017 BRMPS yes
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## # frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactococcus <- inner_join(seq_var_table, brmps_2x_phylo) %>% #combine two phyloseq objects created above
select(-starts_with("lactoba")) #get rid of lactococcus
## Joining, by = c("participant_id", "study_week", "semester")
lactococcus
## # A tibble: 54 x 15
## participant_id study_week semester `lactococcus la… lactococcus
## <chr> <chr> <chr> <dbl> <dbl>
## 1 U500 week1 Winter2… 0 0
## 2 U500 week3 Winter2… 0 0
## 3 U501 week1 Winter2… 0 0
## 4 U501 week3 Winter2… 5.67 0
## 5 U502 week1 Winter2… 0 0
## 6 U502 week3 Winter2… 0 0
## 7 U507 week1 Winter2… 17 0
## 8 U507 week3 Winter2… 0.5 0
## 9 U509 week1 Winter2… 0 0
## 10 U509 week3 Winter2… 8.67 0
## # … with 44 more rows, and 10 more variables: `lactococcus
## # raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## # garvieae` <dbl>, status <chr>, supplement_consumed <chr>,
## # use_data <chr>, quantity_compliant <chr>, frequency <chr>,
## # ph_median <dbl>, ph_mean <dbl>
#Calculate Row Sums
lactococcus_total <- rowSums(lactococcus[,4:8])
lactococcus_total
## [1] 0.00 0.00 0.00 5.67 0.00 0.00 17.00 0.50 0.00 8.67 4.00
## [12] 3.67 0.00 0.00 1.67 0.00 3.00 20.67 0.00 0.00 0.67 1.67
## [23] 0.00 0.00 0.67 1.67 1.33 0.00 2.00 0.00 7.00 0.00 3.67
## [34] 0.00 0.00 0.00 0.00 15.00 3.67 10.00 4.67 3.67 0.33 0.00
## [45] 0.33 0.00 4.00 0.00 0.33 1.67 6.50 0.67 5.67 4.00
#Mutate Row Sums into Previous Table
new_lactococcus <- lactococcus %>%
mutate(lactococcus = lactococcus_total) %>%
select(participant_id, study_week, semester, lactococcus, ph_median, ph_mean, frequency, supplement_consumed)
new_lactococcus
## # A tibble: 54 x 8
## participant_id study_week semester lactococcus ph_median ph_mean
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 U500 week1 Winter2… 0 6.6 6.6
## 2 U500 week3 Winter2… 0 6.45 6.45
## 3 U501 week1 Winter2… 0 6.6 6.6
## 4 U501 week3 Winter2… 5.67 6.2 6.2
## 5 U502 week1 Winter2… 0 6.4 6.4
## 6 U502 week3 Winter2… 0 6.4 6.4
## 7 U507 week1 Winter2… 17 6.7 6.7
## 8 U507 week3 Winter2… 0.5 6.4 6.4
## 9 U509 week1 Winter2… 0 6.8 6.8
## 10 U509 week3 Winter2… 8.67 6 6
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## # supplement_consumed <chr>
#Correlation Data With Graph
wk_3_lactococcus <- new_lactococcus %>%
filter(study_week == "week3")
wk_3_lactococcus_plot <- wk_3_lactococcus %>%
ggplot(aes(x = ph_mean,
y = lactococcus)) +
geom_point() + #puts data points to match x and y coordinates
geom_smooth(method = "lm", #used to create a linear best fit line
se = FALSE) + #hides confidence interval around line
xlab("Week 3 Mean pH") +
ylab("Relative Abundance of Lactococcus Bacteria")
wk_3_lactococcus_plot
#Correlation Test
lactococcus_correlation <- wk_3_lactococcus %>%
lm(ph_mean ~ lactococcus, data = .) #test relationship
summary(lactococcus_correlation) #view results
##
## Call:
## lm(formula = ph_mean ~ lactococcus, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9090 -0.3441 0.1001 0.3160 1.0910
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.10904 0.10678 57.212 <2e-16 ***
## lactococcus 0.01909 0.01838 1.039 0.309
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4817 on 25 degrees of freedom
## Multiple R-squared: 0.04137, Adjusted R-squared: 0.00302
## F-statistic: 1.079 on 1 and 25 DF, p-value: 0.3089
lactococcus_correlation
##
## Call:
## lm(formula = ph_mean ~ lactococcus, data = .)
##
## Coefficients:
## (Intercept) lactococcus
## 6.10904 0.01909
#p-value = .3089
#R-squared = .04137
Just looking at the scatter plot we can see that there does not appear to be any correlation or pattern between the dots, as they appear to be randomly scattered. Our statistical analysis confirms this as the large p-value (p-value = .3089) indicates that the null hypothesis is true meaning there is no correlation between pH and Lactococcus genre bacteria. Additionally, the small r-squared value indicates that only about 4.1% of the data can be truly represented by the line of best fit, which is extremely small and inaccurate. If there were to be a correlation, it would be a positive one according to the statistical analysis, which is opposite to what we hypothesized, as here the number of Lactococcus bacteria increases as pH increases.
We figured since everybody has a different microbiome, that some individuals may contain Lactococcus or just Lactobacillus genre bacteria and since they perform similar functions we thought it would be reasonable to combine them into one data set and see if there was a correlation between “Lacto” Bacteria and pH to compensate for different microbiome makeups.
#"Lacto" Genre Bacteria
#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
filter(supplement_consumed == "BRMPS",
frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
## participant_id study_week status semester supplement_cons… use_data
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 U445 week3 during Fall2017 BRMPS yes
## 2 U446 week3 during Fall2017 BRMPS yes
## 3 U448 week3 during Fall2017 BRMPS yes
## 4 U449 week3 during Fall2017 BRMPS yes
## 5 U450 week3 during Fall2017 BRMPS yes
## 6 U452 week3 during Fall2017 BRMPS yes
## 7 U453 week3 during Fall2017 BRMPS yes
## 8 U455 week3 during Fall2017 BRMPS yes
## 9 U458 week3 during Fall2017 BRMPS yes
## 10 U460 week3 during Fall2017 BRMPS yes
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## # frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactobacteria <- inner_join(seq_var_table, brmps_2x_phylo)
## Joining, by = c("participant_id", "study_week", "semester")
lactobacteria
## # A tibble: 54 x 51
## participant_id study_week semester `lactococcus la… lactococcus
## <chr> <chr> <chr> <dbl> <dbl>
## 1 U500 week1 Winter2… 0 0
## 2 U500 week3 Winter2… 0 0
## 3 U501 week1 Winter2… 0 0
## 4 U501 week3 Winter2… 5.67 0
## 5 U502 week1 Winter2… 0 0
## 6 U502 week3 Winter2… 0 0
## 7 U507 week1 Winter2… 17 0
## 8 U507 week3 Winter2… 0.5 0
## 9 U509 week1 Winter2… 0 0
## 10 U509 week3 Winter2… 8.67 0
## # … with 44 more rows, and 46 more variables: `lactococcus
## # raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## # garvieae` <dbl>, `lactobacillus pontis` <dbl>, `lactobacillus
## # murinus` <dbl>, `lactobacillus sanfranciscensis` <dbl>, `lactobacillus
## # johnsonii` <dbl>, lactobacillaceae <dbl>, `lactobacillus
## # ruminis` <dbl>, `lactobacillus salivarius` <dbl>, `lactobacillus
## # timonensis` <dbl>, lactobacillus <dbl>, `lactobacillus
## # crispatus/gallinarum` <dbl>, `lactobacillus gallinarum` <dbl>,
## # `lactobacillus iners` <dbl>, `lactobacillus vaginalis` <dbl>,
## # `lactobacillus crispatus` <dbl>, `lactobacillus
## # apodemi/murinus` <dbl>, `lactobacillus fermentum` <dbl>,
## # `lactobacillus gasseri/johnsonii` <dbl>, lactobacillales <dbl>,
## # `lactobacillus casei` <dbl>, `lactobacillus brevis` <dbl>,
## # `lactobacillus buchneri` <dbl>, `lactobacillus delbrueckii` <dbl>,
## # `lactobacillus apis` <dbl>, `lactobacillus hokkaidonensis` <dbl>,
## # `lactobacillus agilis` <dbl>, `lactobacillus casei group` <dbl>,
## # `lactobacillus rhamnosus` <dbl>, `lactobacillus
## # kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## # `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## # `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## # `lactobacillus algidus` <dbl>, `lactobacillus
## # acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>,
## # status <chr>, supplement_consumed <chr>, use_data <chr>,
## # quantity_compliant <chr>, frequency <chr>, ph_median <dbl>,
## # ph_mean <dbl>
#Calculate Row Sums
lactobacteria_total <- rowSums(lactobacteria[,4:44])
lactobacteria_total
## [1] 22.00 280.50 0.00 5.67 0.00 0.00 17.00 0.50 5.33 22.34
## [11] 4.00 3.67 0.00 48.50 6.67 81.33 4.00 20.67 0.00 115.50
## [21] 0.67 1.67 59.00 56.00 0.67 4.00 1.33 0.00 190.00 31.33
## [31] 43.50 474.66 30.67 6.67 0.00 0.00 0.00 39.00 27.01 57.66
## [41] 5.34 5.34 3.33 0.00 0.33 0.00 4.00 340.00 29.99 79.66
## [51] 122.50 566.34 5.67 4.67
#Mutate Row Sums into Previous Table
new_lactobacteria <- lactobacteria %>%
mutate(lactobacteria = lactobacteria_total) %>%
select(participant_id, study_week, semester, lactobacteria, ph_median, ph_mean, frequency, supplement_consumed)
new_lactobacteria
## # A tibble: 54 x 8
## participant_id study_week semester lactobacteria ph_median ph_mean
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 U500 week1 Winter2… 22 6.6 6.6
## 2 U500 week3 Winter2… 280. 6.45 6.45
## 3 U501 week1 Winter2… 0 6.6 6.6
## 4 U501 week3 Winter2… 5.67 6.2 6.2
## 5 U502 week1 Winter2… 0 6.4 6.4
## 6 U502 week3 Winter2… 0 6.4 6.4
## 7 U507 week1 Winter2… 17 6.7 6.7
## 8 U507 week3 Winter2… 0.5 6.4 6.4
## 9 U509 week1 Winter2… 5.33 6.8 6.8
## 10 U509 week3 Winter2… 22.3 6 6
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## # supplement_consumed <chr>
#Correlation Data With Graph
wk_3_lactobacteria <- new_lactobacteria %>%
filter(study_week == "week3")
wk_3_lactobacteria_plot <- wk_3_lactobacteria %>%
ggplot(aes(x = ph_mean,
y = lactobacteria)) +
geom_point() + #puts data points to match x and y coordinates
geom_smooth(method = "lm", #used to create a linear best fit line
se = FALSE) + #hides confidence interval around line
xlab("Week 3 Mean pH") +
ylab("Relative Abundance of Lactate Producers")
wk_3_lactobacteria_plot
#Correlation Test
lactobacteria_correlation <- wk_3_lactobacteria %>%
lm(ph_mean ~ lactobacteria, data = .) #test relationship
summary(lactobacteria_correlation) #view results
##
## Call:
## lm(formula = ph_mean ~ lactobacteria, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9631 -0.3334 0.1928 0.2740 1.0488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1247251 0.1074485 57.002 <2e-16 ***
## lactobacteria 0.0004724 0.0006326 0.747 0.462
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4866 on 25 degrees of freedom
## Multiple R-squared: 0.02182, Adjusted R-squared: -0.01731
## F-statistic: 0.5577 on 1 and 25 DF, p-value: 0.4622
lactobacteria_correlation
##
## Call:
## lm(formula = ph_mean ~ lactobacteria, data = .)
##
## Coefficients:
## (Intercept) lactobacteria
## 6.1247251 0.0004724
#p-value = .4622
#R-squared = .02182
Just looking at the scatter plot we can see that there does not appear to be any correlation or pattern between the dots, as they appear to be randomly scattered. Our statistical analysis confirms this as the large p-value (p-value = .4622) indicates that the null hypothesis is true meaning there is no correlation between pH and “Lacto” bacteria. Additionally, the small r-squared value indicates that only about 2.2% of the data can be truly represented by the line of best fit, which is extremely small and inaccurate. If there were to be a correlation, it would be a positive one according to the statistical analysis, which is opposite to what we hypothesized, as here the number of “Lacto” bacteria increases as pH increases.
#Did the Number of Lactate Producers Rise?
Because our correlation tests did not find any significant results, we decided to look a little deeper to see if the supplement BRMPS taken 2x daily did at least increase the number of Lactate producers (Lactococcus and Lactobacillus bacteria). These Lactate producers may not be correlated with more acidic conditions, but they confer a variety of health benefits such as decreased gut permeability (inhibiting toxic bacterial biproducts from being absorbed) as well as disease resistance and increased immunity. Additionally, they produce acidic biproducts which may contribute to lower gut pH.
#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
filter(supplement_consumed == "BRMPS",
frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
## participant_id study_week status semester supplement_cons… use_data
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 U445 week3 during Fall2017 BRMPS yes
## 2 U446 week3 during Fall2017 BRMPS yes
## 3 U448 week3 during Fall2017 BRMPS yes
## 4 U449 week3 during Fall2017 BRMPS yes
## 5 U450 week3 during Fall2017 BRMPS yes
## 6 U452 week3 during Fall2017 BRMPS yes
## 7 U453 week3 during Fall2017 BRMPS yes
## 8 U455 week3 during Fall2017 BRMPS yes
## 9 U458 week3 during Fall2017 BRMPS yes
## 10 U460 week3 during Fall2017 BRMPS yes
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## # frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactobacteria <- inner_join(seq_var_table, brmps_2x_phylo)
## Joining, by = c("participant_id", "study_week", "semester")
lactobacteria
## # A tibble: 54 x 51
## participant_id study_week semester `lactococcus la… lactococcus
## <chr> <chr> <chr> <dbl> <dbl>
## 1 U500 week1 Winter2… 0 0
## 2 U500 week3 Winter2… 0 0
## 3 U501 week1 Winter2… 0 0
## 4 U501 week3 Winter2… 5.67 0
## 5 U502 week1 Winter2… 0 0
## 6 U502 week3 Winter2… 0 0
## 7 U507 week1 Winter2… 17 0
## 8 U507 week3 Winter2… 0.5 0
## 9 U509 week1 Winter2… 0 0
## 10 U509 week3 Winter2… 8.67 0
## # … with 44 more rows, and 46 more variables: `lactococcus
## # raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## # garvieae` <dbl>, `lactobacillus pontis` <dbl>, `lactobacillus
## # murinus` <dbl>, `lactobacillus sanfranciscensis` <dbl>, `lactobacillus
## # johnsonii` <dbl>, lactobacillaceae <dbl>, `lactobacillus
## # ruminis` <dbl>, `lactobacillus salivarius` <dbl>, `lactobacillus
## # timonensis` <dbl>, lactobacillus <dbl>, `lactobacillus
## # crispatus/gallinarum` <dbl>, `lactobacillus gallinarum` <dbl>,
## # `lactobacillus iners` <dbl>, `lactobacillus vaginalis` <dbl>,
## # `lactobacillus crispatus` <dbl>, `lactobacillus
## # apodemi/murinus` <dbl>, `lactobacillus fermentum` <dbl>,
## # `lactobacillus gasseri/johnsonii` <dbl>, lactobacillales <dbl>,
## # `lactobacillus casei` <dbl>, `lactobacillus brevis` <dbl>,
## # `lactobacillus buchneri` <dbl>, `lactobacillus delbrueckii` <dbl>,
## # `lactobacillus apis` <dbl>, `lactobacillus hokkaidonensis` <dbl>,
## # `lactobacillus agilis` <dbl>, `lactobacillus casei group` <dbl>,
## # `lactobacillus rhamnosus` <dbl>, `lactobacillus
## # kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## # `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## # `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## # `lactobacillus algidus` <dbl>, `lactobacillus
## # acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>,
## # status <chr>, supplement_consumed <chr>, use_data <chr>,
## # quantity_compliant <chr>, frequency <chr>, ph_median <dbl>,
## # ph_mean <dbl>
#Calculate Row Sums
lactobacteria_total <- rowSums(lactobacteria[,4:44])
lactobacteria_total
## [1] 22.00 280.50 0.00 5.67 0.00 0.00 17.00 0.50 5.33 22.34
## [11] 4.00 3.67 0.00 48.50 6.67 81.33 4.00 20.67 0.00 115.50
## [21] 0.67 1.67 59.00 56.00 0.67 4.00 1.33 0.00 190.00 31.33
## [31] 43.50 474.66 30.67 6.67 0.00 0.00 0.00 39.00 27.01 57.66
## [41] 5.34 5.34 3.33 0.00 0.33 0.00 4.00 340.00 29.99 79.66
## [51] 122.50 566.34 5.67 4.67
#Mutate Row Sums into Previous Table
new_lactobacteria <- lactobacteria %>%
mutate(lactobacteria = lactobacteria_total) %>%
select(participant_id, study_week, semester, lactobacteria, ph_median, ph_mean, frequency, supplement_consumed)
new_lactobacteria
## # A tibble: 54 x 8
## participant_id study_week semester lactobacteria ph_median ph_mean
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 U500 week1 Winter2… 22 6.6 6.6
## 2 U500 week3 Winter2… 280. 6.45 6.45
## 3 U501 week1 Winter2… 0 6.6 6.6
## 4 U501 week3 Winter2… 5.67 6.2 6.2
## 5 U502 week1 Winter2… 0 6.4 6.4
## 6 U502 week3 Winter2… 0 6.4 6.4
## 7 U507 week1 Winter2… 17 6.7 6.7
## 8 U507 week3 Winter2… 0.5 6.4 6.4
## 9 U509 week1 Winter2… 5.33 6.8 6.8
## 10 U509 week3 Winter2… 22.3 6 6
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## # supplement_consumed <chr>
#Weekly Data
wk_1_lactobacteria <- new_lactobacteria %>%
filter(study_week == "week1")
wk_3_lactobacteria <- new_lactobacteria %>%
filter(study_week == "week3")
#Normality
shapiro.test(wk_1_lactobacteria$lactobacteria)
##
## Shapiro-Wilk normality test
##
## data: wk_1_lactobacteria$lactobacteria
## W = 0.5561, p-value = 6.575e-08
shapiro.test(wk_3_lactobacteria$lactobacteria)
##
## Shapiro-Wilk normality test
##
## data: wk_3_lactobacteria$lactobacteria
## W = 0.60405, p-value = 2.305e-07
#Normality Plots
ggplot(wk_1_lactobacteria, aes(x = lactobacteria)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(wk_3_lactobacteria, aes(x = lactobacteria)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(wk_1_lactobacteria$lactobacteria); qqline(wk_1_lactobacteria$lactobacteria)
qqnorm(wk_3_lactobacteria$lactobacteria); qqline(wk_3_lactobacteria$lactobacteria)
#Final Joined Data
stat_lactobacteria <- inner_join(x = wk_1_lactobacteria, y = wk_3_lactobacteria,
by = c("participant_id", "frequency",
"semester", "supplement_consumed")) %>%
rename(lactobacteria_wk1_abund = lactobacteria.x,
lactobacteria_wk3_abund = lactobacteria.y) %>%
select(-starts_with("study_week"))
#Sample Size + Varience Assumptions
str(stat_lactobacteria) # n=26
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 26 obs. of 10 variables:
## $ participant_id : chr "U500" "U501" "U502" "U507" ...
## $ semester : chr "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
## $ lactobacteria_wk1_abund: num 22 0 0 17 5.33 4 0 6.67 4 0 ...
## $ ph_median.x : num 6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
## $ ph_mean.x : num 6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
## $ frequency : chr "2xdaily" "2xdaily" "2xdaily" "2xdaily" ...
## $ supplement_consumed : chr "BRMPS" "BRMPS" "BRMPS" "BRMPS" ...
## $ lactobacteria_wk3_abund: num 280.5 5.67 0 0.5 22.34 ...
## $ ph_median.y : num 6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
## $ ph_mean.y : num 6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
var.test(x = stat_lactobacteria$lactobacteria_wk1_abund,
y = stat_lactobacteria$lactobacteria_wk3_abund,
alternative = "greater") #p-value= 1
##
## F test to compare two variances
##
## data: stat_lactobacteria$lactobacteria_wk1_abund and stat_lactobacteria$lactobacteria_wk3_abund
## F = 0.080474, num df = 25, denom df = 25, p-value = 1
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.04115382 Inf
## sample estimates:
## ratio of variances
## 0.08047412
#Statistical Tests
wilcox.test(x = stat_lactobacteria$lactobacteria_wk1_abund,
y = stat_lactobacteria$lactobacteria_wk3_abund,
alternative = "less", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x =
## stat_lactobacteria$lactobacteria_wk1_abund, : cannot compute exact p-value
## with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: stat_lactobacteria$lactobacteria_wk1_abund and stat_lactobacteria$lactobacteria_wk3_abund
## V = 58, p-value = 0.007803
## alternative hypothesis: true location shift is less than 0
#p-value= .007803
#Violin Plot for Visualization
brmps_2x_lactobacteria_plot <- new_lactobacteria %>%
filter(study_week == "week1" | study_week == "week3") %>%
ggplot(aes(x = study_week,
y = lactobacteria)) +
geom_violin(aes(color = study_week)) +
geom_jitter(aes(color = study_week))+
labs(x = "Study Week",
y = "Lacto Bacteria Relative Abundance",
title = "Weekly Comparison of Abundances of Lacto Bacteria") +
theme(legend.position = "none")
brmps_2x_lactobacteria_plot
#Save Plot
save_plot(filename = "brmps_2x_lactobacteria_plot.pdf",
plot = brmps_2x_lactobacteria_plot,
nrow = 1, ncol = 1,
base_aspect_ratio = 1.1)